rm(list=ls())
library(ggpubr)
library(TCGAbiolinks)
library(reshape)
#data("ToothGrowth")
#p <- ggboxplot(ToothGrowth, x = "supp", y = "len",
#               color = "supp", palette = "jco",
#               add = "jitter",
#               facet.by = "dose", short.panel.labs = FALSE,yscale="log2")
# Use only p.format as label. Remove method name.
#p + stat_compare_means(label = "p.format")


projects = TCGAbiolinks:::getGDCprojects()$project_id
ind = grep("TCGA",projects)
projects=projects[ind]

path = "Z:/Bioinformatics/ExternalDatabases/TCGAbiolinksAnalysis/UnnormalizedData/"
stor=c()
#Gene = c("COL18A1","COL16A1","COL14A1","COL10A1","COL9A3","COL9A2","COL8A1","COL6A5","COL6A2","COL6A1","COL4A6","COL4A3","COL4A1","COL3A1","COL2A1","COL27A1","COL26A1","COL25A1","COl20A1","COL1A2","COL1A1")
#Gene="LAIR1"
Gene = c("LAIR1")

for(project in projects){
  datPath = paste(path,paste(project,"Data.csv",sep=""),sep="/")
  metDatPath = paste(path,paste(project,"Metadata.csv",sep=""),sep="/")
  metadata = read.csv(metDatPath)
  dat = read.csv(datPath)
  
  ind = which(metadata$tissue.definition=="Solid Tissue Normal")
  norm = metadata$cases[ind]
  norm = unlist(lapply(norm,function(x){aa=gsub("-","\\.",x);return(aa)}))
  ind = which(metadata$tissue.definition=="Primary solid Tumor")
  tumor = metadata$cases[ind]
  tumor = unlist(lapply(tumor,function(x){aa=gsub("-","\\.",x);return(aa)}))
  flg=0
  if(length(norm)>2){
    flg=flg+1
    ind = which(colnames(dat)%in%norm)
    normDat = dat[,ind]
  }
  
  if(length(tumor)>2){
    flg=flg+1
    ind = which(colnames(dat)%in%tumor)
    tumorDat = dat[,ind]
  }
  genes = apply(as.matrix(dat$X),1,function(x){aa=unlist(strsplit(x,"\\|"));return(aa[1])})
  ind = which(genes==Gene)
  
  if(length(ind>0)&(flg==2)){
    norMat = cbind(project,t(normDat[ind,]),"Normal")
    row.names(norMat)=NULL
    tumMat = cbind(project,t(tumorDat[ind,]),"Tumor")
    row.names(tumMat)=NULL
    stor=rbind(stor,norMat)
    stor=rbind(stor,tumMat)
  }
}

stor = as.data.frame(stor)
colnames(stor) = c("proj","gene","DiseaseState")

stor$gene = as.numeric(stor$gene)
stor$gene = log2(stor$gene+1)
write.csv(stor,"C:/Users/ShaikJ/Desktop/Figs/LAIR1.csv")

p=ggplot(stor,aes(DiseaseState,gene,fill=DiseaseState))+facet_wrap(.~proj,scales='free')+stat_compare_means(show.legend=FALSE,label.x.npc = 0.2,label.y.npc = 0.93,color="black",size=3)+geom_boxplot(lwd=1)+labs(x="Disease State",y="log2(TPM+1)")+scale_fill_manual(breaks = c("Normal", "Tumor"),values=c("green", "red"))+theme(panel.background = element_rect(fill = "white",colour = "white",size = 0.5, linetype = "solid"),text = element_text(size = 20,face="bold"))


png("U:/NC410Manuscript/Results/LAIR1ExpressionVariousCancersAllCollagens.png", width = 16, height = 9, units = 'in', res = 300)
p
dev.off()




projects = unique(stor$proj)
strj=c()

for(ii in projects){
  ind = which(stor$proj==ii)
  dat = stor[ind,]
  ind = which(dat$DiseaseState=="Normal")
  dat1 = dat$gene[ind]
  dat1avg = mean(dat1)
  ind = which(dat$DiseaseState=="Tumor")
  dat2 = dat$gene[ind]
  dat2avg = mean(dat2)
  
  datall = rbind(dat1avg,dat2avg)
  datall = data.frame(datall)
  datall$proj = dat$proj[1]
  datall$DiseaseState = c("Normal","Tumor")
  strj=rbind(strj,datall)
}

strj$proj=gsub("TCGA-","",strj$proj)
write.csv(strj,"C:/Users/ShaikJ/Desktop/Figs/LAIR2.csv")

p=ggplot(strj,aes(x=DiseaseState ,y=datall,fill=DiseaseState))+geom_bar(stat="identity")+scale_fill_manual(breaks = c("Normal", "Tumor"),values=rev(gray.colors(2)))+theme(legend.position = "none",panel.background = element_rect(fill = "white",colour = "white",size = 0.5, linetype = "solid"),text = element_text(size = 20,face="bold"),axis.text.x=element_text(angle=90),axis.text.y=element_text(color="black"))+ facet_grid(cols = vars(proj),scales="free")+xlab("Tissue") + ylab("Median log2(TPM+1)")+ coord_cartesian(ylim=c(5,12.5))

png("U:/NC410Manuscript/Results/LAIR1ExpressionBarplotMedian.png", width = 20, height = 5, units = 'in', res = 300)
p
dev.off()


